Aminet 37

home *** CD-ROM | disk | FTP | other *** search

/ Aminet 37 / Aminet 37 (2000)(Schatztruhe)[!][Jun 2000].iso / Aminet / dev / cross / devpic.lha / devpic / source / picasm / token.c < prev

Wrap

C/C++ Source or Header | 2000-02-27 | 17KB | 823 lines

/* * picasm -- token.c * * Include handling, macro expansion, lexical analysis * * Timo Rossi <trossi@iki.fi> * */ #include <stdio.h> #include <string.h> #include <stdlib.h> #include <ctype.h> #include "picasm.h" /* * keyword table for tokenizer * * this must be in sync with the token definitions in picasm.h */ static char Keyword_Table[] = { "include\0" "macro\0" "endm\0" "exitm\0" "if\0" "else\0" "endif\0" "equ\0" "set\0" "end\0" "org\0" "ds\0" "edata\0" "config\0" "picid\0" "device\0" "defined\0" "streq\0" "isstr\0" "chrval\0" "opt\0" "local\0" "endlocal\0" "error\0" /* 12/14-bit PIC instruction mnemonics */ "addlw\0" "addwf\0" "andlw\0" "andwf\0" "bcf\0" "bsf\0" "btfsc\0" "btfss\0" "call\0" "clrf\0" "clrw\0" "clrwdt\0" "comf\0" "decf\0" "decfsz\0" "goto\0" "incf\0" "incfsz\0" "iorlw\0" "iorwf\0" "movlw\0" "movf\0" "movwf\0" "nop\0" "option\0" "retfie\0" "retlw\0" "return\0" "rlf\0" "rrf\0" "sleep\0" "sublw\0" "subwf\0" "swapf\0" "tris\0" "xorlw\0" "xorwf\0" "\0" }; /* tokenizer definitions & variables */ int tok_char; int token_type, line_buf_off; char token_string[TOKSIZE]; long token_int_val; int ifskip_mode; /* TRUE when skipping code inside if..endif */ /* * include file handling */ void begin_include(char *fname) { struct inc_file *p; p = mem_alloc(sizeof(struct inc_file)); p->type = INC_FILE; p->v.f.fname = mem_alloc(strlen(fname)+1); strcpy(p->v.f.fname, fname); p->linenum = 0; p->cond_nest_count = cond_nest_count; if((p->v.f.fp = fopen(p->v.f.fname, "r")) == NULL) { if(current_file == NULL) { fatal_error("Can't open '%s'", p->v.f.fname); } else { error(0, "Can't open include file '%s'", p->v.f.fname); free(p->v.f.fname); free(p); line_buf_ptr = NULL; tok_char = ' '; return; } } p->next = current_file; current_file = p; line_buf_ptr = NULL; tok_char = ' '; } /* * Move to previous level of include/macro */ void end_include(void) { struct inc_file *p; struct macro_arg *arg1, *arg2; if(current_file != NULL) { if(cond_nest_count != current_file->cond_nest_count) { error(0, "conditional assembly not terminated by ENDIF"); cond_nest_count = current_file->cond_nest_count; } p = current_file->next; if(current_file->type == INC_FILE) { fclose(current_file->v.f.fp); free(current_file->v.f.fname); } else { /* free macro arguments */ arg1 = current_file->v.m.args; while(arg1 != NULL) { arg2 = arg1->next; free(arg1); arg1 = arg2; } } free(current_file); current_file = p; } } /* * Expand a macro */ void expand_macro(struct symbol *sym) { struct inc_file *minc; struct macro_arg *arg; char *cp; int narg; int parcnt, d_char; write_listing_line(0); /* list the macro call line */ minc = mem_alloc(sizeof(struct inc_file)); minc->type = INC_MACRO; minc->v.m.sym = sym; minc->v.m.ml = sym->v.text; minc->linenum = 0; minc->cond_nest_count = cond_nest_count; minc->v.m.args = NULL; minc->v.m.uniq_id = unique_id_count++; arg = NULL; for(narg = 1;;narg++) { while(tok_char != '\n' && isspace(tok_char)) /* skip whitespace */ read_src_char(); if(tok_char == '\n' || tok_char == '\0' || tok_char == ';' || tok_char == EOF) break; cp = line_buf_ptr-1; /* * Macro parameters are separated by commas. However, strings and * character constants (using double and single quotes) * can be used even if they contain commas. Also commas * inside parenthesis (such as function parameter delimiters) * don't count as macro parameter separators. * */ parcnt = 0; /* parenthesis nesting count */ while(!isspace(tok_char) && tok_char != '\n' && tok_char != '\0' && tok_char != ';' && tok_char != EOF) { if(parcnt == 0 && tok_char == ',') break; if(tok_char == '(') { parcnt++; } else if(tok_char == ')') { parcnt--; } else if(tok_char == '"' || tok_char == '\'') { /* quoted string or character constant */ d_char = tok_char; do { read_src_char(); } while(tok_char != d_char && tok_char != '\n' && tok_char != '\0' && tok_char != EOF); if(tok_char != d_char) break; } read_src_char(); } if(narg >= 10) warning("Too many macro arguments (max. 9)"); if(arg == NULL) { arg = mem_alloc(sizeof(struct macro_arg) +(line_buf_ptr-cp-1)); minc->v.m.args = arg; } else { arg->next = mem_alloc(sizeof(struct macro_arg) +(line_buf_ptr-cp-1)); arg = arg->next; } strncpy(arg->text, cp, line_buf_ptr-cp-1); arg->text[line_buf_ptr-cp-1] = '\0'; arg->next = NULL; /* skip whitespace */ while(tok_char != '\n' && isspace(tok_char)) read_src_char(); if(tok_char != ',') break; read_src_char(); } if(tok_char != ';' && tok_char != '\n' && tok_char != '\0' && tok_char != EOF) error(0, "Extraneous characters after a valid source line"); minc->next = current_file; current_file = minc; line_buf_ptr = NULL; tok_char = ' '; get_token(); } /* * Read a character from source file. * Handles includes and macros. */ void read_src_char(void) { char *scp, *pcp, *dcp; int parm; struct macro_arg *arg; static char tmpbuf[12]; if(line_buf_ptr == NULL || *line_buf_ptr == '\0') { if(current_file == NULL) { tok_char = EOF; return; } getc1: if(current_file->type == INC_MACRO) { if(current_file->v.m.ml == NULL) { end_include(); goto getc1; } scp = current_file->v.m.ml->text; dcp = line_buffer; while(*scp != '\0' && dcp < &line_buffer[sizeof(line_buffer)]) { if(*scp == '\\') { scp++; if(*scp >= '1' && *scp <= '9') { /* macro arg */ parm = *scp - '1'; /* macro arg #, starting from 0 */ for(arg = current_file->v.m.args; arg != NULL && parm > 0; arg = arg->next, parm--); if(arg != NULL) { for(pcp = arg->text; *pcp != '\0' && dcp < &line_buffer[sizeof(line_buffer)];) *dcp++ = *pcp++; } scp++; } else if(*scp == '0' || *scp == '@') { sprintf(tmpbuf, "%03d", current_file->v.m.uniq_id); for(pcp = tmpbuf; *pcp != '\0' && dcp < &line_buffer[sizeof(line_buffer)];) *dcp++ = *pcp++; scp++; } else if(*scp == '#') { /* number of arguments */ for(parm = 0, arg = current_file->v.m.args; arg != NULL; arg = arg->next, parm++); sprintf(tmpbuf, "%d", parm); for(pcp = tmpbuf; *pcp != '\0' && dcp < &line_buffer[sizeof(line_buffer)];) *dcp++ = *pcp++; scp++; } else *dcp++ = *scp; } else *dcp++ = *scp++; } if(dcp == &line_buffer[sizeof(line_buffer)]) { error(0, "Line buffer overflow"); dcp--; } *dcp = '\0'; /* NUL-terminate the line */ current_file->v.m.ml = current_file->v.m.ml->next; } else { if(fgets(line_buffer, sizeof(line_buffer)-1, current_file->v.f.fp) == NULL) { if(current_file->next != NULL) { end_include(); goto getc1; } tok_char = EOF; return; } } current_file->linenum++; line_buf_ptr = line_buffer; } tok_char = ((unsigned char)(*line_buf_ptr++)); } /* * Lexical analyzer * Returns the next token from the source file */ void get_token(void) { int tp, base; char *cp; for(;;) { /* * skip spaces */ while(tok_char != '\n' && isspace(tok_char)) read_src_char(); if(tok_char == EOF) { token_type = TOK_EOF; token_string[0] = '\0'; return; } if(tok_char != ';') break; /* comment */ line_buf_ptr = NULL; tok_char = '\n'; } /* for(;;) */ /* * character constant (integer) * (does not currently handle the quote character) */ if(tok_char == '\'') { read_src_char(); token_string[0] = tok_char; read_src_char(); if(tok_char != '\'') goto invalid_token; read_src_char(); token_string[1] = '\0'; token_int_val = (long)((unsigned char)token_string[0]); token_type = TOK_INTCONST; return; } if(tok_char == '"') { /* string constant (include filename) */ read_src_char(); tp = 0; while(tp < TOKSIZE-1 && tok_char != '"' && tok_char != EOF) { token_string[tp++] = tok_char; read_src_char(); } if(tok_char != '\"' && !ifskip_mode) error(0, "String not terminated"); token_string[tp] = '\0'; read_src_char(); token_type = TOK_STRCONST; return; } /* * integer number */ if(isdigit(tok_char)) { token_type = TOK_INTCONST; token_string[0] = tok_char; tp = 1; read_src_char(); if(token_string[0] == '0') { if(tok_char == 'x' || tok_char == 'X') { /* hex number */ token_string[tp++] = tok_char; read_src_char(); while(tp < TOKSIZE-1 && isxdigit(tok_char)) { token_string[tp++] = tok_char; read_src_char(); } token_string[tp] = '\0'; token_int_val = strtoul(&token_string[2], NULL, 16); /* should put range check here */ return; } } while(tp < TOKSIZE-2 && isxdigit(tok_char)) { token_string[tp++] = tok_char; read_src_char(); } base = 10; switch(tok_char) { case 'H': /* hex */ case 'h': base = 16; /* hex */ token_string[tp++] = tok_char; read_src_char(); break; case 'O': /* octal */ case 'o': base = 8; /* octal */ token_string[tp++] = tok_char; read_src_char(); break; default: if(token_string[0] == '0' && (token_string[1] == 'b' || token_string[1] == 'B')) { token_string[tp] = '\0'; token_int_val = strtoul(&token_string[2], &cp, 2); if(cp != &token_string[tp] && !ifskip_mode) error(0, "Invalid digit in a number"); /* should put range check here */ return; } else if(token_string[tp-1] == 'B' || token_string[tp-1] == 'b') { base = 2; } else { if(token_string[tp-1] != 'D' && token_string[tp-1] != 'd') token_string[tp++] = '\0'; } break; } token_string[tp] = '\0'; token_int_val = strtoul(token_string, &cp, base); if(cp != &token_string[tp-1] && !ifskip_mode) error(0, "Invalid digit in a number"); /* should put range check here */ return; } /* * Handle B'10010100' binary etc. */ if((tok_char == 'b' || tok_char == 'B' || tok_char == 'd' || tok_char == 'D' || tok_char == 'h' || tok_char == 'H' || tok_char == 'o' || tok_char == 'O') && line_buf_ptr != NULL && *line_buf_ptr == '\'') { token_string[0] = tok_char; read_src_char(); token_string[1] = tok_char; read_src_char(); tp = 2; while(tp < TOKSIZE-1 && isxdigit(tok_char)) { token_string[tp++] = tok_char; read_src_char(); } if(tok_char != '\'') goto invalid_token; token_string[tp++] = tok_char; read_src_char(); token_string[tp] = '\0'; switch(token_string[0]) { case 'b': case 'B': base = 2; break; case 'o': case 'O': base = 8; break; case 'h': case 'H': base = 16; break; case 'd': case 'D': default: base = 10; break; } token_int_val = strtoul(&token_string[2], &cp, base); if(cp != &token_string[tp-1] && !ifskip_mode) error(0, "Invalid digit in a number"); /* should put range check here */ token_type = TOK_INTCONST; return; } /* * keyword or identifier */ if(tok_char == '_' || tok_char == '.' || isalpha(tok_char)) { line_buf_off = (line_buf_ptr - &line_buffer[1]); token_string[0] = tok_char; tp = 1; read_src_char(); if(token_string[0] == '.' && tok_char != '_' && !isalnum(tok_char)) { token_string[1] = '\0'; token_type = TOK_PERIOD; return; } while(tp < TOKSIZE-1 && (tok_char == '_' || tok_char == '.' || isalnum(tok_char))) { token_string[tp++] = tok_char; read_src_char(); } token_string[tp] = '\0'; token_type = FIRST_KW; cp = Keyword_Table; while(*cp) { if(strcasecmp(token_string, cp) == 0) return; while(*cp++) ; token_type++; } token_type = TOK_IDENTIFIER; return; } /* * non-numeric & non-alpha tokens */ switch(tok_char) { case '\n': case '\0': token_type = TOK_NEWLINE; strcpy(token_string, "\\n"); skip_eol(); return; case '<': token_string[0] = tok_char; read_src_char(); if(tok_char == '<') { token_string[1] = tok_char; token_string[2] = '\0'; token_type = TOK_LSHIFT; read_src_char(); return; } if(tok_char == '=') { token_string[1] = tok_char; token_string[2] = '\0'; token_type = TOK_LESS_EQ; read_src_char(); return; } if(tok_char == '>') { token_string[1] = tok_char; token_string[2] = '\0'; token_type = TOK_NOT_EQ; read_src_char(); return; } token_type = TOK_LESS; token_string[1] = '\0'; return; case '>': token_string[0] = tok_char; read_src_char(); if(tok_char == '>') { token_string[1] = tok_char; token_string[2] = '\0'; token_type = TOK_RSHIFT; read_src_char(); return; } if(tok_char == '=') { token_string[1] = tok_char; token_string[2] = '\0'; token_type = TOK_GT_EQ; read_src_char(); return; } token_string[1] = '\0'; token_type = TOK_GREATER; return; case '!': token_string[0] = tok_char; read_src_char(); if(tok_char != '=') goto invalid_token; token_string[1] = tok_char; token_string[2] = '\0'; read_src_char(); token_type = TOK_NOT_EQ; return; case '=': token_string[0] = tok_char; read_src_char(); if(tok_char == '=') { token_string[1] = tok_char; read_src_char(); token_string[2] = '\0'; token_type = TOK_EQ; return; } if(tok_char == '<') { token_string[1] = tok_char; read_src_char(); token_string[2] = '\0'; token_type = TOK_LESS_EQ; return; } if(tok_char == '>') { token_string[1] = tok_char; read_src_char(); token_string[2] = '\0'; token_type = TOK_GT_EQ; return; } if(tok_char == '_' || tok_char == '.' || isalpha(tok_char)) { /* local symbol */ line_buf_off = (line_buf_ptr - &line_buffer[2]); token_string[0] = tok_char; tp = 1; read_src_char(); while(tp < TOKSIZE-1 && (tok_char == '_' || tok_char == '.' || isalnum(tok_char))) { token_string[tp++] = tok_char; read_src_char(); } token_string[tp] = '\0'; token_type = TOK_LOCAL_ID; return; } token_string[1] = '\0'; token_type = TOK_EQUAL; return; case '$': read_src_char(); if(!isxdigit(tok_char)) { token_string[0] = '$'; token_string[1] = '\0'; token_type = TOK_DOLLAR; return; } tp = 0; do { token_string[tp++] = tok_char; read_src_char(); } while(tp < TOKSIZE-1 && isxdigit(tok_char)); token_string[tp] = '\0'; token_int_val = strtoul(&token_string[1], NULL, 16); token_type = TOK_INTCONST; /* should put range check here */ return; case '\\': token_type = TOK_BACKSLASH; break; case ',': token_type = TOK_COMMA; break; case '(': token_type = TOK_LEFTPAR; break; case ')': token_type = TOK_RIGHTPAR; break; case '+': token_type = TOK_PLUS; break; case '-': token_type = TOK_MINUS; break; case '&': token_type = TOK_BITAND; break; case '|': token_type = TOK_BITOR; break; case '^': token_type = TOK_BITXOR; break; case '~': token_type = TOK_BITNOT; break; case '*': token_type = TOK_ASTERISK; break; case '/': token_type = TOK_SLASH; break; case '%': token_type = TOK_PERCENT; break; case ':': token_type = TOK_COLON; break; case '[': token_type = TOK_LEFTBRAK; break; case ']': token_type = TOK_RIGHTBRAK; break; default: goto invalid_token; } token_string[0] = tok_char; token_string[1] = '\0'; read_src_char(); return; invalid_token: if(!ifskip_mode) error(0, "Invalid token"); token_string[0] = '\0'; token_type = TOK_INVALID; } /* skip to the next line */ void skip_eol(void) { line_buf_ptr = NULL; tok_char = ' '; }